# Replication file for: "Beyond the Hazard Ratio: Generating        #
# Expected Durations from the Cox Proportional Hazards Model"       #
#                                                                   #
# Jonathan Kropko                                                   #
# University of Virginia                                            #
# jkropko@virginia.edu                                              #
#                                                                   #
# Jeffrey J. Harden                                                 #
# University of Notre Dame                                          #
# jeff.harden@nd.edu                                                #
#                                                                   #
# cox.ed.gam() function (GAM approach)                              #
# This function computes the expected durations using the observed  #
# value method for setting independent variable profiles (see       #
# Hanmer and Kalkan 2013).                                          #
# Last update: May 20, 2017                                         #
#####################################################################
### Arguments ###
# cox.model: A coxph() object.
# var.name: Name of the independent variable of interest.
# val1: Value of var.name for new observation #1.
# val2: Value of var.name for new observation #2.
# vcv: Variance-covariance matrix to use if coefficients are simulated from a multivariate normal. The default is vcov(cox.model). This argument is not used if bootstrap = TRUE.
# m: Number of simulations or bootstrap repetitions. The default is 1000.
# ci: Confidence interval level for the expected durations. The default is 95%.
# cluster: Cluster variable if bootstrapping is to be done by clusters of observations rather than individual observations. The default is NULL (no cluster bootstrapping).
# k: Number of knots in the GAM smoother. The default is -1, which is the default in the gam() function.
# var.name2: Name of a second independent variable to set (constituent term in an interaction). The default is NA.
# val3: Value for var.name2. The default is NA.
# var.name3: Name of a third independent variable to set (interaction term). The default is NA.
# val4: Value of var.name3 for new observation #1. The default is NA.
# val5: Value of var.name3 for new observation #2. The default is NA.

### Outputs ###
# y: Dependent variable.
# failed: Censoring indicator.
# rank.xb: Expected fail time rankings for each observation.
# exp.dur: Expected duration for each observation.
# expdur.se: Standard error of the expected duration for each observation.
# x1: Design matrix for independent variable value #1.
# x2: Design matrix for independent variable value #2.
# ed.pe: Expected duration point estimates for the two new observations.
# ed.se: Expected duration standard errors for the two new observations.
# ed.lo: Expected duration lower confidence bounds for the two new observations.
# ed.hi: Expected duration upper confidence bounds for the two new observations.
# diff.pe: First difference in expected duration point estimates for the two new observations.
# diff.se: First difference in expected duration standard errors for the two new observations.
# diff.lo: First difference in expected duration lower confidence bounds for the two new observations.
# diff.hi: First difference in expected duration upper confidence bounds for the two new observations.
# dhr.pe: Change in hazard rate point estimates for the two new observations.
# dhr.se: Change in hazard rate standard errors for the two new observations.
# dhr.lo: Change in hazard rate lower confidence bounds for the two new observations.
# dhr.hi: Change in hazard rate upper confidence bounds for the two new observations.

### Function ###
cox.ed.gam <- function(cox.model, var.name, val1, val2, m = 1000, ci = .95, cluster = NULL, k = -1, var.name2 = NA, val3 = NA, var.name3 = NA, val4 = NA, val5 = NA){
require(mgcv)
require(MASS)
require(rms)
source("bootcov2.R")
n <- nrow(model.matrix(cox.model))

## Draw new data via bootstrapping ##
if(length(cluster) > 1){
boot.model <- bootcov2(cph(Surv(as.numeric(cox.model$y[ , 1]), as.numeric(cox.model$y[ , 2]), type = "right") ~ model.matrix(cox.model), x = TRUE, y = TRUE), maxit = 50, B = m, cluster = cluster)
boot.betas <- boot.model$boot.Coef
} else{
boot.model <- bootcov2(cph(Surv(as.numeric(cox.model$y[ , 1]), as.numeric(cox.model$y[ , 2]), type = "right") ~ model.matrix(cox.model), x = TRUE, y = TRUE), maxit = 50, B = m)
boot.betas <- boot.model$boot.Coef 
}

## Loop over bootstrap samples ## 
# For computing the rank for the new observations based on the actual cases #
rank.new <- matrix(NA, nrow = nrow(boot.betas), ncol = n + 2)
ed.new <- matrix(NA, nrow = nrow(boot.betas), ncol = n + 2)
dhr <- numeric(nrow(boot.betas))

# Begin loop
i <- 1
while(i <= nrow(boot.betas)){

# Bring together bootstrap sample for use in the GAM
gam.data <- data.frame(y = as.numeric(cox.model$y[boot.model$b.ind[ , i] , 1]), failed = as.numeric(cox.model$y[boot.model$b.ind[ , i] , 2]), rank.xb = rank(exp(model.matrix(cox.model)[boot.model$b.ind[ , 1], ] %*% boot.betas[i, ]), ties.method = "random"))

# Estimate GAM fit of DV from expected rank with bootstrap sample
gam.model <- gam(y ~ s(rank.xb, bs = "cr", k = k), data = subset(gam.data, failed == 1))

# Put together two new independent variable profiles
x1 <- x2 <- x <- model.matrix(cox.model)
x1[ , paste(var.name)] <- val1
x2[ , paste(var.name)] <- val2

if(is.character(var.name2)){
x1[ , paste(var.name2)] <- x2[ , paste(var.name2)] <- val3
} 

if(is.character(var.name3)){
x1[ , paste(var.name3)] <- val4
x2[ , paste(var.name3)] <- val5
}
 
# Compute exp(xB) for actual observations and the two new ones
xb.old <- exp(x %*% boot.betas[i, ])
xb.new1 <- median(exp(x1 %*% boot.betas[i, ]))
xb.new2 <- median(exp(x2 %*% boot.betas[i, ]))

# Rank exp(xB)
new.data <- c(xb.new1, xb.new2, xb.old)
rank.new[i, ] <- rank(new.data, ties.method = "random")

# Generate expected durations for the new and old observations from GAM fit
ed.new[i, ] <- predict(gam.model, newdata = data.frame(rank.xb = rank.new[i, ]), type = "response")

# Compute expected change in the hazard rate
dhr[i] <- (xb.new2 - xb.new1)/xb.new1
i <- i + 1
} # End loop

## Generate linear predictor rank for the new observations with the original Cox model coefficients ##
gam.data <- data.frame(y = as.numeric(cox.model$y[ , 1]), failed = as.numeric(cox.model$y[ , 2]), rank.xb = rank(exp(model.matrix(cox.model) %*% coef(cox.model)), ties.method = "random"))

# Compute exp(xB) for actual observations and the two new ones
xb.old <- exp(x %*% coef(cox.model))
xb.new1 <- median(exp(x1 %*% coef(cox.model)))
xb.new2 <- median(exp(x2 %*% coef(cox.model)))

# Rank exp(xB)
new.data <- c(xb.new1, xb.new2, xb.old)
rank.new <- rank(new.data, ties.method = "random")

# Estimate GAM fit of DV from expected rank with original sample
gam.model <- gam(y ~ s(rank.xb, bs = "cr", k = k), data = subset(gam.data, failed == 1))

# Generate expected duration from GAM fit 
gam.pred <- predict(gam.model, newdata = data.frame(rank.xb = 1:n), type = "response", se.fit = TRUE)
exp.dur <- gam.pred$fit
expdur.se <- gam.pred$se.fit

## Compute point estimates and standard errors ##
ed.pe <- predict(gam.model, newdata = data.frame(rank.xb = rank.new), type = "response")[1:2] 
ed.se <- apply(ed.new[ , 1:2], 2, sd)
ed.lo <- ed.pe - (qnorm(1 - (1 - ci)/2)*ed.se)
ed.hi <- ed.pe + (qnorm(1 - (1 - ci)/2)*ed.se)

diff.pe <- ed.pe[1] - ed.pe[2]
diff.se <- sd(ed.new[ , 1] - ed.new[ , 2])
diff.lo <- diff.pe - (qnorm(1 - (1 - ci)/2)*diff.se)
diff.hi <- diff.pe + (qnorm(1 - (1 - ci)/2)*diff.se)

dhr.pe <- median(dhr)
dhr.se <- sd(dhr)
dhr.lo <- quantile(dhr, prob = (1 - ci)/2, na.rm = TRUE)
dhr.hi <- quantile(dhr, prob = 1 - (1 - ci)/2, na.rm = TRUE) 

return(list(y = gam.data$y, failed = gam.data$failed, rank.xb = gam.data$rank.xb, exp.dur = as.vector(exp.dur), expdur.se = as.vector(expdur.se), x1 = x1, x2 = x2, ed.pe = ed.pe, ed.se = ed.se, ed.lo = ed.lo, ed.hi = ed.hi, diff.pe = diff.pe, diff.se = diff.se, diff.lo = diff.lo, diff.hi = diff.hi, dhr.pe = dhr.pe, dhr.se = dhr.se, dhr.lo = dhr.lo, dhr.hi = dhr.hi))
}